Exercise 5¶
Get a polygons map of the lowest administrative unit possible.
Get a table of variables for those units. At least 3 numerical variables.
Preprocess both tables and get them ready for merging.
Do the merging, making the changes needed so that you keep the most rows.
In [ ]:
!pip install fiona
Collecting fiona
Downloading fiona-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (56 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 56.6/56.6 kB 3.7 MB/s eta 0:00:00
Requirement already satisfied: attrs>=19.2.0 in /usr/local/lib/python3.11/dist-packages (from fiona) (25.3.0)
Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from fiona) (2025.6.15)
Requirement already satisfied: click~=8.0 in /usr/local/lib/python3.11/dist-packages (from fiona) (8.2.1)
Collecting click-plugins>=1.0 (from fiona)
Downloading click_plugins-1.1.1.2-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting cligj>=0.5 (from fiona)
Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Downloading fiona-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 17.3/17.3 MB 92.0 MB/s eta 0:00:00
Downloading click_plugins-1.1.1.2-py2.py3-none-any.whl (11 kB)
Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Installing collected packages: cligj, click-plugins, fiona
Successfully installed click-plugins-1.1.1.2 cligj-0.7.2 fiona-1.10.1
In [ ]:
# data table
import geopandas as gpd
import os
usaDataLink="https://github.com/Derick047/PC6_Parte2/raw/refs/heads/main/Data%20utilizada/usa_counties/usa_counties.gpkg"
gpd.list_layers(usaDataLink)
Out[ ]:
| name | geometry_type | |
|---|---|---|
| 0 | counties | MultiPolygon |
In [ ]:
datadisMap = gpd.read_file(usaDataLink, layer='counties')
In [ ]:
datadisMap.info()
<class 'geopandas.geodataframe.GeoDataFrame'> RangeIndex: 3235 entries, 0 to 3234 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 STATEFP 3235 non-null object 1 COUNTYFP 3235 non-null object 2 COUNTYNS 3235 non-null object 3 AFFGEOID 3235 non-null object 4 GEOID 3235 non-null object 5 NAME 3235 non-null object 6 NAMELSAD 3235 non-null object 7 STUSPS 3235 non-null object 8 STATE_NAME 3235 non-null object 9 LSAD 3235 non-null object 10 ALAND 3235 non-null int64 11 AWATER 3235 non-null int64 12 geometry 3235 non-null geometry dtypes: geometry(1), int64(2), object(10) memory usage: 328.7+ KB
In [ ]:
people_url = "https://github.com/Derick047/PC6_Parte2/raw/main/Data%20utilizada/People.csv"
income_url = "https://github.com/Derick047/PC6_Parte2/raw/main/Data%20utilizada/Income.csv"
In [ ]:
people = pd.read_csv(people_url, encoding="ISO-8859-1")
income = pd.read_csv(income_url, encoding="ISO-8859-1")
In [ ]:
# Verificar columnas
print(people.columns)
print(income.columns)
Index(['FIPS', 'State', 'County', 'Attribute', 'Value'], dtype='object') Index(['FIPS', 'State', 'County', 'Attribute', 'Value'], dtype='object')
In [ ]:
print(people.columns.to_list())
['FIPS', 'State', 'County', 'Attribute', 'Value']
In [ ]:
# Filtrar atributos específicos
poverty = people[people["Attribute"] == "OwnHomePct"].copy()
poverty["Value"] = pd.to_numeric(poverty["Value"], errors="coerce")
poverty = poverty[["FIPS", "Value"]].rename(columns={"Value": "OwnHomePct"})
income = income[income["Attribute"] == "Median_Household_Income_2021"].copy()
income["Value"] = pd.to_numeric(income["Value"], errors="coerce")
income = income[["FIPS", "Value"]].rename(columns={"Value": "MedianIncome"})
In [ ]:
poverty["FIPS"] = poverty["FIPS"].astype(str).str.zfill(5)
income["FIPS"] = income["FIPS"].astype(str).str.zfill(5)
socio = pd.merge(poverty, income, on="FIPS", how="outer")
counties2 = datadisMap.merge(socio, left_on="GEOID", right_on="FIPS", how="left")
In [ ]:
people["Attribute"].unique()
Out[ ]:
array(['Age65AndOlderNum2020', 'Age65AndOlderPct2020',
'AsianNonHispanicNum2020', 'AsianNonHispanicPct2020', 'AvgHHSize',
'BlackNonHispanicNum2020', 'BlackNonHispanicPct2020',
'Ed1LessThanHSNum', 'Ed1LessThanHSPct', 'Ed2HSDiplomaOnlyNum',
'Ed2HSDiplomaOnlyPct', 'Ed3SomeCollegeNum', 'Ed3SomeCollegePct',
'Ed4AssocDegreeNum', 'Ed4AssocDegreePct', 'Ed5CollegePlusNum',
'Ed5CollegePlusPct', 'ESTIMATESBASE2020', 'FemaleHHNum',
'FemaleHHPct', 'ForeignBornAfricaNum', 'ForeignBornAfricaPct',
'ForeignBornAsiaNum', 'ForeignBornAsiaPct', 'ForeignBornCaribNum',
'ForeignBornCaribPct', 'ForeignBornCentralSouthAmNum',
'ForeignBornCentralSouthAmPct', 'ForeignBornEuropeNum',
'ForeignBornEuropePct', 'ForeignBornMexNum', 'ForeignBornMexPct',
'ForeignBornNum', 'ForeignBornPct', 'HH65PlusAloneNum',
'HH65PlusAlonePct', 'HispanicNum2020', 'HispanicPct2020',
'HispanicPopChangeRate1020', 'LandAreaSQMiles2020',
'MultipleRaceNum2020', 'MultipleRacePct2020',
'MultipleRacePopChangeRate1020',
'NativeAmericanNonHispanicNum2020',
'NativeAmericanNonHispanicPct2020',
'Natural_Change_Rate_2020_2021', 'NaturalChange1019',
'NaturalChangeRate1019', 'NaturalChg2020_2021',
'Net_InterMigration_2020_2021', 'Net_InterMigrationRate_2020_2021',
'Net_International_Migration_2010_2019',
'Net_International_Migration_Rate_2010_2019',
'Net_Migration_2020_2021', 'Net_Migration_Rate_2020_2021',
'NetMigration1019', 'NetMigrationRate1019', 'NonEnglishHHNum',
'NonEnglishHHPct', 'NonHispAsianPopChangeRate1020',
'NonHispBlackPopChangeRate1020', 'NonHispNatAmerPopChangeRate1020',
'NonHispWhitePopChangeRate1020', 'OwnHomeNum', 'OwnHomePct',
'Pop_change_Rate_2020_2021', 'PopChangeRate1019',
'PopChangeRate1020', 'PopDensity2020', 'POPESTIMATE2020',
'POPESTIMATE2021', 'TotalHH', 'TotalOccHU', 'TotalPop2020',
'TotalPop25Plus', 'TotalPopACS', 'TotalPopEst2010',
'TotalPopEst2011', 'TotalPopEst2012', 'TotalPopEst2013',
'TotalPopEst2014', 'TotalPopEst2015', 'TotalPopEst2016',
'TotalPopEst2017', 'TotalPopEst2018', 'TotalPopEst2019',
'TotalPopEstBase2010', 'Under18Num2020', 'Under18Pct2020',
'WhiteNonHispanicNum2020', 'WhiteNonHispanicPct2020'], dtype=object)
In [ ]:
own_home_df = people[people["Attribute"] == "OwnHomePct"].copy()
own_home_df["Value"] = pd.to_numeric(own_home_df["Value"], errors="coerce")
In [ ]:
import seaborn as sea
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
sea.histplot(own_home_df["Value"], color='green', bins=30)
plt.title("Distribución de hogares con vivienda propia en EE.UU.")
plt.xlabel("Porcentaje (%)")
plt.ylabel("Frecuencia")
plt.grid(True)
plt.show()
In [ ]:
import geopandas as gpd
import pandas as pd
# Leer shapefile de condados
usa_link = "https://github.com/Derick047/PC6_Parte2/raw/main/Data%20utilizada/usa_counties/usa_counties.gpkg"
counties = gpd.read_file(usa_link, layer="counties")
counties["GEOID"] = counties["GEOID"].astype(str)
In [ ]:
counties2.columns.to_list()
Out[ ]:
['STATEFP', 'COUNTYFP', 'COUNTYNS', 'AFFGEOID', 'GEOID', 'NAME', 'NAMELSAD', 'STUSPS', 'STATE_NAME', 'LSAD', 'ALAND', 'AWATER', 'geometry', 'FIPS', 'OwnHomePct', 'MedianIncome']
In [ ]:
!pip install folium matplotlib mapclassify
Requirement already satisfied: folium in /usr/local/lib/python3.11/dist-packages (0.19.7) Requirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (3.10.0) Collecting mapclassify Downloading mapclassify-2.9.0-py3-none-any.whl.metadata (3.1 kB) Requirement already satisfied: branca>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from folium) (0.8.1) Requirement already satisfied: jinja2>=2.9 in /usr/local/lib/python3.11/dist-packages (from folium) (3.1.6) Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from folium) (2.0.2) Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from folium) (2.32.3) Requirement already satisfied: xyzservices in /usr/local/lib/python3.11/dist-packages (from folium) (2025.4.0) Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (1.3.2) Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (0.12.1) Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (4.58.4) Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (1.4.8) Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (24.2) Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (11.2.1) Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (3.2.3) Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (2.9.0.post0) Requirement already satisfied: networkx>=3.2 in /usr/local/lib/python3.11/dist-packages (from mapclassify) (3.5) Requirement already satisfied: pandas>=2.1 in /usr/local/lib/python3.11/dist-packages (from mapclassify) (2.2.2) Requirement already satisfied: scikit-learn>=1.4 in /usr/local/lib/python3.11/dist-packages (from mapclassify) (1.6.1) Requirement already satisfied: scipy>=1.12 in /usr/local/lib/python3.11/dist-packages (from mapclassify) (1.15.3) Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2>=2.9->folium) (3.0.2) Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas>=2.1->mapclassify) (2025.2) Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas>=2.1->mapclassify) (2025.2) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.7->matplotlib) (1.17.0) Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn>=1.4->mapclassify) (1.5.1) Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn>=1.4->mapclassify) (3.6.0) Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->folium) (3.4.2) Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->folium) (3.10) Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->folium) (2.4.0) Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->folium) (2025.6.15) Downloading mapclassify-2.9.0-py3-none-any.whl (286 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 286.7/286.7 kB 14.7 MB/s eta 0:00:00 Installing collected packages: mapclassify Successfully installed mapclassify-2.9.0
In [ ]:
counties2.explore(
column="OwnHomePct",
scheme="fisherjenks",
legend=True,
tooltip=False,
popup=["NAME", "STATE_NAME", "OwnHomePct"],
legend_kwds=dict(colorbar=False),
cmap="Greens"
)
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook